This Jupyter notebook is intented to be used alongside the book Python for Bioinformatics
STRINGS
In [2]:
"This is a string in Python"
'This is a string in Python'
'''This is a string in Python'''
"""This is a string in Python"""
Out[2]:
In [4]:
"A single quote (’) inside a double quote"
'Here we have "double quotes" inside single quotes'
Out[4]:
In [5]:
"Mixing quotes leads to the dark side'
In [6]:
"""Hi! I'm a
multiline
string"""
Out[6]:
In [7]:
"Hi! I'm a\nmultiline\n string"
Out[7]:
Strings are sequences of Unicode characters
In [4]:
'In Python 3, strings are Unicode: こんにちは 世界'
Out[4]:
String Manipulation
In [4]:
signal_peptide = 'MASKATLLLAFTLLFATCIA'
In [5]:
signal_peptide.lower()
Out[5]:
In [6]:
signal_peptide
Out[6]:
In [7]:
signal_peptide = signal_peptide.lower()
signal_peptide
Out[7]:
In [8]:
dna_seq = 'GCTAGTAATGTG'
m_rna_seq = dna_seq.replace('T','U')
m_rna_seq
Out[8]:
In [9]:
dna_seq
Out[9]:
In [10]:
c = dna_seq.count("C")
g = dna_seq.count("G")
(c+g)/len(dna_seq)*100
Out[10]:
In [11]:
m_rna_seq
Out[11]:
In [12]:
m_rna_seq.find('AUG')
Out[12]:
In [13]:
m_rna_seq.find('GGG')
Out[13]:
In [14]:
'This string has words separated by spaces'.split()
Out[14]:
In [15]:
"Alex Doe,5555-2333,nobody@example.com".split()
Out[15]:
In [16]:
"Alex Doe,5555-2333,nobody@example.com".split(",")
Out[16]:
In [17]:
''.join(['A','C','A','T'])
Out[17]:
List Is a Basic Datatype in Python
In [18]:
'Alex Doe,5555-2333,hi@example.com'.split(',')
Out[18]:
In [19]:
first_list = [1, 2, 3, 4, 5]
In [20]:
other_list = [1, 'two', 3, 4, 'last']
In [21]:
nested_list = [1, 'two', first_list, 4, 'last']
nested_list
Out[21]:
In [22]:
empty_list = []
empty_list
Out[22]:
In [23]:
first_list = [1, 2, 3, 4, 5]
first_list[0]
Out[23]:
In [24]:
first_list[1]
Out[24]:
In [25]:
first_list = [1, 2, 3, 4, 5]
first_list[-1]
Out[25]:
In [26]:
first_list[-4]
Out[26]:
In [27]:
aseq = "atggctaggc"
list(aseq)
Out[27]:
In [28]:
samples = ['red'] * 5
samples
Out[28]:
In [29]:
samples = [None] * 5
samples
Out[29]:
In [30]:
a = [0, 1, 2, 3, 4, 5]
In [31]:
[3*x for x in a]
Out[31]:
In [32]:
animals = [' King Kong', ' Godzilla ', 'Gamera ']
[x.strip() for x in animals]
Out[32]:
In [33]:
animals = [' King Kong', ' Godzilla ', 'Gamera ']
[x.strip() for x in animals if 'i' in x]
Out[33]:
Modifying Lists
In [34]:
first_list.append(99)
first_list
Out[34]:
In [35]:
first_list.insert(2,50)
first_list
Out[35]:
In [36]:
first_list.extend([6,7,8])
first_list
Out[36]:
In [37]:
[1,2,3]+[4,5]
Out[37]:
In [38]:
first_list
Out[38]:
In [39]:
first_list.pop()
Out[39]:
In [40]:
first_list.pop(2)
Out[40]:
In [41]:
first_list
Out[41]:
In [42]:
first_list.remove(99)
first_list
Out[42]:
In [43]:
first_list
Out[43]:
In [44]:
first_list.remove(10)
In [45]:
a = [1, 2, 3]
b = a
b.pop()
Out[45]:
In [46]:
a
Out[46]:
In [47]:
import copy
a = [1, 2, 3]
b = copy.copy(a)
b.pop()
Out[47]:
In [48]:
a
Out[48]:
In [49]:
a = [1, 2, 3]
b = a[:]
b.pop()
Out[49]:
In [50]:
a
Out[50]:
In [51]:
point = (23, 56, 11)
In [52]:
point.append(3)
In [53]:
point.pop()
Common Properties of the Sequences
In [5]:
point = (23, 56, 11)
point[0]
Out[5]:
In [55]:
point[1]
Out[55]:
In [8]:
my_sequence = 'MRVLLVALALLALAASATS'
my_sequence[0]
Out[8]:
In [65]:
my_sequence[5]
Out[65]:
In [2]:
parameters = ['UniGene', 'dna', 'Mm.248907', 5]
parameters[2]
Out[2]:
In [6]:
point[-1]
Out[6]:
In [61]:
point[-2]
Out[61]:
In [66]:
my_sequence[-2]
Out[66]:
In [67]:
my_sequence[-4]
Out[67]:
In [9]:
my_sequence[-1]
Out[9]:
In [69]:
seqdata = ('MRVLLVALALLA', 12, '5FE9EEE8EE2DC2C7')
seqdata[0][5]
Out[69]:
In [70]:
my_sequence="Python"
my_sequence[0:2]
Out[70]:
In [71]:
my_sequence[:2]
Out[71]:
In [72]:
my_sequence="Python"
my_sequence[4:6]
Out[72]:
In [73]:
my_sequence[4:]
Out[73]:
In [74]:
my_sequence[1:5]
Out[74]:
In [75]:
my_sequence[1:5:2]
Out[75]:
In [76]:
my_sequence[::-1]
Out[76]:
In [77]:
point = (23, 56, 11)
11 in point
Out[77]:
In [78]:
my_sequence = 'MRVLLVALALLALAASATS'
'X' in my_sequence
Out[78]:
In [79]:
point = (23, 56, 11)
point2 = (2, 6, 7)
point + point2
Out[79]:
In [80]:
dna_seq = 'ATGCTAGACGTCCTCAGATAGCCG'
tata_box = 'TATAAA'
tata_box + dna_seq
Out[80]:
In [81]:
point + tata_box
In [82]:
point = (23, 56, 11)
len(point)
Out[82]:
In [83]:
my_sequence = 'MRVLLVALALLALAASATS'
len(my_sequence)
Out[83]:
In [84]:
point
Out[84]:
In [85]:
max(point)
Out[85]:
In [86]:
min(point)
Out[86]:
In [87]:
my_sequence = 'MRVLLVALALLALAASATS'
max(my_sequence)
Out[87]:
In [88]:
min(my_sequence)
Out[88]:
In [89]:
tata_box = 'TATAAA'
list(tata_box)
Out[89]:
Dictionaries
In [90]:
iupac = {'A':'Ala','C':'Cys','E':'Glu'}
print('C stands for the amino acid {0}'.format(iupac['C']))
In [91]:
iupac['E']
Out[91]:
In [5]:
rgb = [('red','ff0000'), ('green','00ff00'), ('blue','0000ff')]
colors_d = dict(rgb)
colors_d
Out[5]:
In [93]:
rgb = dict(red='ff0000', green='00ff00', blue='0000ff')
rgb
Out[93]:
In [94]:
rgb = {}
rgb['red'] = 'ff0000'
rgb['green'] = '00ff00'
rgb
Out[94]:
In [95]:
len(iupac)
Out[95]:
In [96]:
iupac['S'] = 'Ser'
len(iupac)
Out[96]:
In [97]:
iupac = {'A':'Ala','C':'Cys','E':'Glu'}
iupac
Out[97]:
In [98]:
iupac['X'] = 'Xaa'
iupac
Out[98]:
In [99]:
from collections import OrderedDict
d = OrderedDict()
d['a'] = 'A'
d['b'] = 'B'
d['c'] = 'C'
d
Out[99]:
In [100]:
iupac
Out[100]:
In [101]:
iupac.keys()
Out[101]:
In [102]:
iupac.values()
Out[102]:
In [103]:
iupac.values()
Out[103]:
In [104]:
iupac.keys()
Out[104]:
In [105]:
iupac_keys = iupac.keys()
iupac_vals = iupac.values()
iupac.pop('X')
Out[105]:
In [106]:
iupac_keys
Out[106]:
In [107]:
iupac_vals
Out[107]:
In [15]:
iupac = {'E': 'Glu', 'X': 'Xaa', 'C': 'Cys', 'A': 'Ala'}
iupac.items()
Out[15]:
In [109]:
iupac = {'E': 'Glu', 'X': 'Xaa', 'C': 'Cys', 'A': 'Ala'}
iupac.get('A','No translation available')
Out[109]:
In [110]:
iupac.get('Z','No translation available')
Out[110]:
In [17]:
iupac.get('Z')
In [114]:
iupac = {'E': 'Glu', 'X': 'Xaa', 'C': 'Cys', 'A': 'Ala'}
del iupac['A']
iupac
Out[114]:
In [117]:
first_set = {'CP0140.1','XJ8113.5','EF3616.3'}
In [118]:
first_set = set()
first_set.add('CP0140.1')
first_set.add('XJ8113.5')
first_set.add('EF3616.3')
first_set
Out[118]:
In [119]:
{2*x for x in [1,2,3]}
Out[119]:
In [120]:
first_set.add('CP0140.1')
first_set
Out[120]:
In [121]:
{2*x for x in [1,1,2,2,3,3]}
Out[121]:
In [122]:
uniques = {2,2,3,4,5,3}
uniques
Out[122]:
In [123]:
first_set = {'CP0140.1','XJ8113.5','EF3616.3'}
other_set = {'EF3616.3'}
common = first_set.intersection(other_set)
common
Out[123]:
In [124]:
common = first_set & other_set
common
Out[124]:
In [125]:
first_set = {'CP0140.1','XJ8113.5','EF3616.3'}
other_set = {'AB7416.2'}
first_set.union(other_set)
Out[125]:
In [126]:
first_set | other_set
Out[126]:
In [127]:
first_set.difference(other_set)
Out[127]:
In [128]:
first_set - other_set
Out[128]:
In [129]:
other_set - first_set
Out[129]:
In [130]:
first_set.symmetric_difference(other_set)
Out[130]:
In [132]:
first_set ^ other_set
Out[132]:
In [133]:
first_set
Out[133]:
In [134]:
list(first_set)
Out[134]:
In [135]:
fs = frozenset(['a','b'])
fs
Out[135]:
In [136]:
fs.remove('a')
In [137]:
fs.add('c')
Naming Objects
In [1]:
23crm = "1" # Start with a number
23 = "1" # Start with a number
Var? = "value" # Has an invalid character (?).
$five = 5 # Has an invalid character ($)
for = 123 # Has a reserved word
if = "data" # Has a reserved word
In [142]:
my_sequence = 'MRVLLVALALLALAASATS'
first_list = [1,2,3,4,5]
d= {1:'a',2:'b',3:'c'}
k = d.keys()
point = (23,56,11)
first_set = {'CP0140.1','XJ8113.5','EF3616.3'}
fs = frozenset(['a','b'])
In [10]:
a = 3
b = [1,2,a]
In [11]:
b
Out[11]:
In [145]:
a = 5
b
Out[145]:
In [146]:
c = [1, 2, 3]
d = [5, 6, c]
In [147]:
c
Out[147]:
In [148]:
d
Out[148]:
In [149]:
c.pop()
Out[149]:
In [150]:
c
Out[150]:
In [151]:
d
Out[151]:
In [152]:
a = 3
b = [1, 2, a]
In [153]:
b
Out[153]:
In [154]:
a = 5
b
Out[154]:
In [13]:
c = [1, 2, 3]
d = [5, 6, c]
In [156]:
c
Out[156]:
In [157]:
d
Out[157]:
In [158]:
c.pop()
Out[158]:
In [160]:
print(c)
In [14]:
print(d)